// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License version 2 as
// published by the Free Software Foundation.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

#ifndef GP_H
#define GP_H

#include "degub.h"
#include "hardware.h"
#include <d3d9.h>
#include <d3dx9.h>
#include <hash_map>

class CompiledDisplayList;
struct CONVERTICES;

enum GPThreadResult { GPTR_SUCCESS, GPTR_ERROR, _GPTR_STILL_ACTIVE=STILL_ACTIVE };

class GP {
public:
	GP(MemInterface &mem, LPDIRECT3DDEVICE9 pd3dDevice, Hardware *h);
	~GP();
	HRESULT clear();
	HRESULT endScene();
	void fifo_set(); //call this when the FIFO read enable bit has been set
	HANDLE getEvent() const { return m.hEvent; }
	HANDLE getThread() const { return m.hThread; }
	const std::string &getErrorString() const { return errorstring; }
	bool endThread(); //the thread cannot be restarted
	void getVerboseText(ostream& str);
private:
	GP& operator=(const GP&);
	Hardware * const hw;
	MemInterface &mem;
	std::string errorstring;

	const BYTE *queue_get_data(size_t size);
	BYTE queue_get_past_byte(size_t npos);
	DWORD queue_get_24bits();
	void waitForMoreData();
	void execute_opcode();
	void updateStopPt();
	unsigned handle_exception(const char *type, const char *error);
	void activate_gp();

	//Statistics
#define GP_STATISTICS(macro) macro(ops, "Started ops")\
	macro(bytes_ndl, "Bytes not in Display list") macro(bytes_dl, "Bytes in Display list")\
	macro(obp, "BP Loads") macro(ocp, "CP Loads") macro(oxf, "XF Loads")\
	macro(odl, "Display list calls") macro(oxfa, "XF Index A Loads")\
	macro(oxfb, "XF Index B Loads") macro(oxfc, "XF Index C Loads")\
	macro(oxfd, "XF Index D Loads") macro(nops, "NOPs") macro(ivcs, "IVCs")\
	macro(odraw_quads, "Quad ops") macro(odraw_trilist, "Trilist ops")\
	macro(odraw_tristrip, "Tristrip ops") macro(odraw_trifan, "Trifan ops")\
	macro(odraw_linelist, "Linelist ops") macro(odraw_linestrip, "Linestrip ops")\
	macro(odraw_points, "Point ops")\
	macro(odraw_null, "0-vertex draw ops")\
	macro(quads, "Quads") macro(trislist, "Tris in list")\
	macro(trisstrip, "Tris in strip") macro(trisfan, "Tris in fan")\
	macro(lineslist, "Lines in list") macro(linesstrip, "Lines in strip")\
	macro(points, "Points")\
	macro(pixelshaders, "Pixel shaders") macro(ps_cachehit, "Pixel shader cache hits")\
	macro(vertexshaders, "Vertex shaders") macro(vs_cachehit, "Vertex shader cache hits")\
	macro(texcopies, "EFB->Texture copies")

#define GPSTAT_DECLARE_INT(id, desc) __int64 id;
	struct STATS {
		GP_STATISTICS(GPSTAT_DECLARE_INT) __int64 frames;
		void dump();
	} stat, dlrStat;

	//main opcodes
	void (GP::*m_arr_opcode[0x100])();
	void (GP::*m_rec_opcode[0x100])();
#define DECLARE_GP_OPFUNC(id) void id(); void dlr_##id()
	DECLARE_GP_OPFUNC(unknown_opcode);
	DECLARE_GP_OPFUNC(nop);
	DECLARE_GP_OPFUNC(load_bp);
	DECLARE_GP_OPFUNC(load_cp);
	DECLARE_GP_OPFUNC(load_xf);
	DECLARE_GP_OPFUNC(load_xf_indx_a);
	DECLARE_GP_OPFUNC(load_xf_indx_b);
	DECLARE_GP_OPFUNC(load_xf_indx_c);
	DECLARE_GP_OPFUNC(load_xf_indx_d);
	DECLARE_GP_OPFUNC(call_list);
	DECLARE_GP_OPFUNC(update_metrics);
	DECLARE_GP_OPFUNC(invalidate_vertex_cache);
	DECLARE_GP_OPFUNC(draw_quads);
	DECLARE_GP_OPFUNC(draw_triangles);
	DECLARE_GP_OPFUNC(draw_triangle_strip);
	DECLARE_GP_OPFUNC(draw_triangle_fan);
	DECLARE_GP_OPFUNC(draw_lines);
	DECLARE_GP_OPFUNC(draw_line_strip);
	DECLARE_GP_OPFUNC(draw_points);
#undef DECLARE_GP_OPFUNC
	//bool strange_opcode();

	//bp handlers
	void bpload(DWORD d);
	void (GP::*m_arr_bp[0x100])(DWORD);
	void bp_direct(DWORD data);
	void bp_pe_zmode(DWORD data);
	void bp_pe_cmode0(DWORD data);
	void bp_pe_done(DWORD data);
	void bp_pe_token(DWORD data);
	void bp_pe_token_int(DWORD data);
	void bp_efb_topleft(DWORD data);
	void bp_efb_bottomright(DWORD data);
	void bp_pe_copy_execute(DWORD data);
	void bp_tx_invalidate(DWORD data);
	void bp_tx_setmode0(DWORD data);
	void bp_tx_setmode1(DWORD data);
	void bp_tx_setimage0(DWORD data);
	void bp_tx_setimage1(DWORD data);
	void bp_tx_setimage2(DWORD data);
	void bp_tx_setimage3(DWORD data);
	void bp_tx_settlut(DWORD data);
	void bp_tlut_load0(DWORD data);
	void bp_tlut_load1(DWORD data);
	void bp_gen_mode(DWORD data);
	void bp_tev_order(DWORD data);
	void bp_tev_color(DWORD data);
	void bp_tev_alpha(DWORD data);
	void bp_tev_alphafunc(DWORD data);
	void bp_tev_register_l(DWORD data);
	void bp_tev_register_h(DWORD data);
	void bp_tev_ksel(DWORD data);
	void bp_mask(DWORD data);
	void bp_su_lpsize(DWORD data);
	void bp_su_scis0(DWORD data);
	void bp_su_scis1(DWORD data);
	void bp_scissor_offset(DWORD data);

	void cpload(BYTE reg, DWORD data);

	//XF stuff
	void load_xf_indx(char name, BYTE array, BYTE stride);
	void dlr_load_xf_indx(char name, BYTE array, BYTE stride);
	template<class T> void handle_xf_multi(WORD length, WORD base, const DWORD *data);
	template<class T> void handle_xf_single(WORD base, DWORD data);
	friend class XFHandlerInterp;
	friend class XFHandlerRec;

	void xfr_clipdisable(DWORD d);
	void xfr_perf0(DWORD d);

	void xfr_invtxspec(DWORD d);
	void xfr_numchannels(DWORD d);
	void xfr_ambient(DWORD index, DWORD d);
	void xfr_material(DWORD index, DWORD d);
	void xfr_channel(DWORD index, DWORD d);
	void xfr_matrixindex0(DWORD d);
	void xfr_matrixindex1(DWORD d);
	void xfr_numtexgens(DWORD d);
	void xfr_texgen(DWORD index, DWORD data);
	void xfr_dualtexgen(DWORD index, DWORD data);
	void xfm_identity(const DWORD *data);
	void xfr_lightcol(DWORD index, DWORD d);
	void xfm_lightgeo(DWORD index, const DWORD *data);
	void xfm_light0att(const DWORD *data);
	void xfm_light0posdir(const DWORD *data);
	void xfm_projmatrix(const DWORD *data);
	void xfm_viewport(const DWORD *data);

	//stuff not directly connected to opcodes are below
	void do_texmatindex(int i, BYTE index);

	typedef std::pair<DWORD, CompiledDisplayList> CDL_PAIR; //Compiled DisplayList
	typedef stdext::hash_map<DWORD, CompiledDisplayList> CDL_HASHMAP;
	CDL_HASHMAP m_cdl_map;
	CDL_HASHMAP::iterator compileList();
	void runCompiledList();

	//drawing stuff
	template<class T> void draw_primitive();
	template<class T> void dlr_draw_primitive();
	void setStreamSource(LPDIRECT3DVERTEXBUFFER9 vertexBuffer, UINT convertexSize);
	friend class DrawQuads;
	friend class DrawTriangles;
	friend class DrawTriangleStrip;
	friend class DrawTriangleFan;
	friend class DrawLines;
	friend class DrawLineStrip;
	friend class DrawPoints;

	void getConvertices(BYTE vat, WORD numvertices, const char* primstr,
		CONVERTICES& results);
	void do_stuff_before_draw(DWORD FVF, WORD midx);
	HRESULT beginScene();
	HRESULT setClearState();
	HRESULT setScissorTest();

	struct TX {
		bool changed;
		DWORD address, width, height, format, tlutformat, tlutoffset;
		DWORD even_offset, odd_offset;
		DWORD even_size, odd_size;
		bool preloaded;
	} tx[8];

	/*#define TEX_CACHE_NLINES 32 //(1*M)/(32*K)	//hyperlines? :}
	struct {
	DWORD lines[TEX_CACHE_NLINES];
	} tcache;
	void do_cache_line_create(DWORD address, DWORD line);
	void do_cache_line_select(DWORD address, DWORD line);*/
	void do_texture(BYTE index);
	LPDIRECT3DTEXTURE9 createTexture(DWORD address, DWORD width, DWORD height,
		DWORD format, DWORD tlutformat, DWORD tlutoffset, int nchanges);
	void createLoadTexture(LPDIRECT3DTEXTURE9 *ppTex, UINT width, UINT height,
		D3DFORMAT format, UINT pitch, const void *src);
	HRESULT checkTextureRequirements(UINT width, UINT height, D3DFORMAT format,
		DWORD usage);
	//bool isMatch(LPDIRECT3DTEXTURE9 pTex, UINT width, UINT height, D3DFORMAT format,
	//DWORD usage);
	bool prepareTexDumpBaseName(string& s, DWORD address, int nchanges);

	struct CTEXTURE {
		LPDIRECT3DTEXTURE9 p;
		int nchanges;
		/*DWORD width, height;
		D3DFORMAT d3dformat;
		bool rt;*/
	};
	typedef std::pair<DWORD, CTEXTURE> CTX_PAIR; //Cached TeXture
	typedef stdext::hash_map<DWORD, CTEXTURE> CTX_HASHMAP;
	CTX_HASHMAP m_ctx_map;

	HRESULT setSS(DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value);
	HRESULT setRS(D3DRENDERSTATETYPE State, DWORD Value);
	HRESULT setTSS(DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value);

	HRESULT compileShader(const string& hlShader, const char *profile,
		LPD3DXBUFFER *ppCode);

	struct VS_KEY {
		struct TEXGEN { //size 3
			/*bool use_tmatrix, input_3d, projection;
			BYTE source;*/
			BYTE emboss_light : 3;
			BYTE emboss_source : 3;
BYTE : 1;
			BYTE source : 4;
			BYTE type : 2;
BYTE : 1;
			BYTE input_3d : 1;
			BYTE projection : 1;
BYTE : 1;
		} texgen[8];

		union CHANNEL {  //size 2
			struct {
				/*BYTE diffuse_func;
				bool lit, material_source_vertex, ambient_source_vertex, atten_enabled, atten_func;
				BYTE lightmask;*/
				WORD material_source_vertex : 1;
				WORD lit : 1;
				WORD light0 : 1;
				WORD light1 : 1;
				WORD light2 : 1;
				WORD light3 : 1;
				WORD ambient_source_vertex : 1;
				WORD diffuse_func : 2;
				WORD atten_enabled : 1;
				WORD atten_func : 1;
				WORD light4 : 1;
				WORD light5 : 1;
				WORD light6 : 1;
				WORD light7 : 1;
			};
			WORD word;
		} color[2], alpha[2];

		union INVTXSPEC {	//size 1
			struct {
				BYTE ntex : 4;
				BYTE nnrm : 2;
				BYTE ncol : 2;
			};
			BYTE byte;
		} host;
		//make these into bitfields
		BYTE ntex, ncol;
		BYTE matrix_index[9];
		WORD midx;

		DWORD FVF;
		bool force_white;
	} vs_key;
	struct VS_DATA {
		LPDIRECT3DVERTEXDECLARATION9 pVertexDeclaration;
		LPDIRECT3DVERTEXSHADER9 pVertexShader;
		DWORD FVFout;
	};
	struct VS_CONTROL {
		bool set, set_proj;	//updater
		bool cull_all, lo_noop; //drawing

		D3DXMATRIX matProj;	//matWorldView
	} vs_control;
	template<class T> class my_hash_compare {
	public:
		size_t operator()(const T& key) const;
		bool operator()(const T& key1, const T& key2) const;
		enum {	// parameters for hash table
			bucket_size = 1,	// bucket_size > 0
			min_buckets = 16};
	private:
		size_t hash_value(const T& key) const;
	};
	typedef stdext::hash_map<VS_KEY, VS_DATA, my_hash_compare<VS_KEY> > VSHashMap;
	VSHashMap vs_map;
	VSHashMap::const_iterator vs_current;

	void setVertexShader(DWORD FVF, WORD midx);
	static void generateVertexShader(ostream& sstr, const VS_KEY &vs_key, DWORD &FVFout);
	static void vs_do_channel(ostream& sstr, const char *chstr,
		const VS_KEY::CHANNEL &channel, int i, const VS_KEY &vs_key);
	void setWVMatrix(DWORD index, const float *data);
	void setTMatrix(DWORD index, const float *data, bool three);
	void setNMatrix(DWORD index, const float *data);

#define GP_TEV_NSTAGES 16
	struct PS_KEY {
		struct STAGE {
			struct COLOR {  //size 3
				BYTE dest : 2;
				BYTE scale : 2;
				BYTE clamp : 1;
				BYTE sub : 1;
				BYTE bias : 2;
				BYTE a : 4;
				BYTE b : 4;
				BYTE c : 4;
				BYTE d : 4;
			} c;
			struct ALPHA {  //size 3
				BYTE dest : 2;
				BYTE scale : 2;
				BYTE clamp : 1;
				BYTE sub : 1;
				BYTE bias : 2;
				BYTE a : 3;
				BYTE b : 3;
				BYTE c : 3;
				BYTE d : 3;
				BYTE swapSelTex : 2;
				BYTE swapSelRas : 2;
			} a;
			struct ORDER {  //size 2
				BYTE texMap : 3;
				BYTE texCoord : 3;
				BYTE texEnabled : 1;
				BYTE rasCID : 3;
			} o;
			BYTE kSelColor, kSelAlpha;

			static bool stage_is_combined(const COLOR &color,	const ALPHA &alpha);
		} stage[GP_TEV_NSTAGES];
		union SWAP_ENTRY {	//size 1
			//TevColorChan r, g, b, a;
			struct {
				BYTE b : 2;
				BYTE a : 2;
				BYTE r : 2;
				BYTE g : 2;
			};
			struct {
				BYTE ab : 4;
				BYTE gr : 4;
			};
		} swap[4];
		struct ALPHAFUNC {
			BYTE op : 2;
			BYTE comp1 : 3;
			BYTE comp0 : 3;
			BYTE a1 : 8;
			BYTE a0 : 8;
		} alphafunc;
		BYTE ntev, ncol, ntex;
	} ps_key;
	struct PS_CONTROL {
		bool set;
		bool reg_set[4];
		union TEV_REG {
			struct {
				BYTE a, b, g, r;
			};
			DWORD dword;
		} reg[4];
	} ps_control;
	struct PS_DATA {
		LPDIRECT3DPIXELSHADER9 pPixelShader;
	};
	typedef stdext::hash_map<PS_KEY, PS_DATA, my_hash_compare<PS_KEY> > PSHashMap;
	PSHashMap ps_map;
	PSHashMap::const_iterator ps_current;

	void setPixelProcessing();
	void setPixelShader();
	static void generatePixelShader(ostream& sstr, const PS_KEY &ps_key);
	void setTextureStageStates();
	void handle_tev_reg_load(int i);

	struct TLUT {
		BYTE load;
		DWORD address, offset, count;
	} tlut;
	void load_tlut();

	struct ZEROABLE {
		LPDIRECT3DDEVICE9 pd3dDevice;
		bool in_scene, in_list;//, do_visi_at_nop;
		MYFILETIME ftActivate;
		bool vii[4];

		HANDLE hThread, hEvent;
		const BYTE *pFifo;
		DWORD alignedFifoEnd, fifoSize, fifoStopPt;
		GP_FIFO fifo;

		DWORD cp_reg[256];
		DWORD bp_reg[256];	//BP registers are really 24-bit, but it would be inefficient to save 256 bytes here.
		DWORD bp_mask;
#define XF_REG_SIZE 0x58
#define XF_MEM_SIZE 0x1000
#define XF_TOTAL_SIZE (XF_MEM_SIZE + XF_REG_SIZE)
		DWORD xf_mem[XF_TOTAL_SIZE];

		BYTE tmem[1*M];

		//int ntex, ncol;

		struct SCISSOR {
			bool changed;
			int top, left, bottom, right, xoffset, yoffset;
		} scissor;

		D3DCULL cullmode;
	} m;
	string mDumpTexDir;


	//This can be changed to a thread-based system.
	//The thread would be created and controlled by the GP class.
	//There should also be an option to wait until the GP thread is complete before
	//continuing execution.
	static unsigned __stdcall ThreadFunc(void *arg);
};

#endif	//GP_H
